package com.fll.codeworld.poi;

import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import java.util.Arrays;
import java.util.List;

/**
 * @author fff
 * word文档导入测试
 * @date 2021/7/23
 */
public class TestPoi {
    public static void main(String[] args)  {
//        String filePath = "D:/work/Solr/ImportData/test.docx";
        String filePath = "C:/Users/fll/Desktop/问题总结/avc/0提资清单.docx";
        String content = readWord(filePath);
    }

    public static String readWord(String path) {
        String buffer = "";
        try {
            if (path.endsWith(".doc")) {
//                InputStream is = new FileInputStream(new File(path));
//                WordExtractor ex = new WordExtractor(is);
//                buffer = ex.getText();
//                ex.close();
            } else if (path.endsWith("docx")) {
                OPCPackage opcPackage = POIXMLDocument.openPackage(path);
                POIXMLTextExtractor extractor = new XWPFWordExtractor(opcPackage);
                buffer = extractor.getText();
                extractor.getDocument();
                String[] split = buffer.split("\n");
                List<String> strings = Arrays.asList(split);
                strings.stream().forEach(item->{
                    System.out.println(item);
                });
                extractor.close();
            } else {
                System.out.println("此文件不是word文件！");
            }

        } catch (Exception e) {
            e.printStackTrace();
        }
        return buffer;
    }
}
